library(tidyr)
library(dplyr)
library(ggplot2)

data_grouped <- data %>%
    mutate(eval_valuation =  case_when(eval_valuation == 1 ~ "1.0",
                                                                         eval_valuation == 0 ~ "0.0",
                                                                         0 < eval_valuation & eval_valuation <= 1/3 ~ "(0, 1/3]",
                                                                         1/3 < eval_valuation & eval_valuation <= 2/3 ~ "(1/3, 2/3]",
                                                                         2/3 < eval_valuation & eval_valuation < 1 ~ "(2/3, 1)",
                                                                         .default = NA
                                                                         
        )) %>%
    mutate(eval_valuation = factor(eval_valuation, levels = c("0.0", "(0, 1/3]", "(1/3, 2/3]", "(2/3, 1)", "1.0"), ordered = TRUE))


data_grouped <- data_grouped %>% 
    group_by(module, agent_name, eval_valuation) %>% 
    summarize(count = n()) %>% 
  ungroup() %>% 
  complete(module, agent_name, eval_valuation, fill = list(count = 0)) %>%
  arrange(agent_name, module)
## `summarise()` has grouped output by 'module', 'agent_name'. You can override
## using the `.groups` argument.
print(data_grouped)
## # A tibble: 40 × 4
##    module                   agent_name        eval_valuation count
##    <chr>                    <chr>             <ord>          <int>
##  1 Gene alias               Dummy Agent GPT35 0.0               45
##  2 Gene alias               Dummy Agent GPT35 (0, 1/3]           0
##  3 Gene alias               Dummy Agent GPT35 (1/3, 2/3]         0
##  4 Gene alias               Dummy Agent GPT35 (2/3, 1)           0
##  5 Gene alias               Dummy Agent GPT35 1.0                5
##  6 Gene disease association Dummy Agent GPT35 0.0               24
##  7 Gene disease association Dummy Agent GPT35 (0, 1/3]           2
##  8 Gene disease association Dummy Agent GPT35 (1/3, 2/3]         4
##  9 Gene disease association Dummy Agent GPT35 (2/3, 1)           1
## 10 Gene disease association Dummy Agent GPT35 1.0               19
## # ℹ 30 more rows
# data_wide <- pivot_wider(data %>% select(-filename, -agent_answer), names_from = "agent_name", values_from = c(eval_valuation, agent_answer_num_function_calls))
# print(head(data_wide))

ggplot(data_grouped, aes(x=eval_valuation, y=count, fill=eval_valuation)) +
  geom_bar(stat="identity") +
  facet_grid(module ~ agent_name) +
  theme_minimal() +
  labs(title="Score distributions by task and agent type") +
    theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) +
    scale_y_continuous(limits = c(0, 50), name = "Count of Questions") +
    scale_x_discrete(name = "Score Bucket") +
    guides(fill = "none")

R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

## Warning in geom_hline(aes(yintercept = mean(eval_valuation), x = agent_name)):
## Ignoring unknown aesthetics: x
## Warning in geom_point(aes(alpha = 0.6, customdata =
## "https://github.com/monarch-initiative/oai-plugin-evals/blob/main/results/" %+%
## : Ignoring unknown aesthetics: customdata and text